This document does some initial exploration of the FAA flight delay data. Starting now with 2015 Airline Service Quality Performance (ASQP) data.
dim(d_15)
## [1] 971365 55
summary(d_15)
## ï..ID YEAR QUARTER MONTH
## Min. : 1 Min. :2015 Min. :1.000 Min. : 1.000
## 1st Qu.: 242842 1st Qu.:2015 1st Qu.:1.000 1st Qu.: 1.000
## Median : 485683 Median :2015 Median :3.000 Median : 8.000
## Mean : 612321 Mean :2015 Mean :2.516 Mean : 5.581
## 3rd Qu.: 728524 3rd Qu.:2015 3rd Qu.:4.000 3rd Qu.:10.000
## Max. :4877622 Max. :2015 Max. :4.000 Max. :10.000
##
## DAY_OF_MONTH DAY_OF_WEEK FLIGHT_DATE
## Min. : 1.00 Min. :1.000 2015-01-02 00:00:00: 16741
## 1st Qu.: 8.00 1st Qu.:2.000 2015-01-05 00:00:00: 16548
## Median :16.00 Median :4.000 2015-01-04 00:00:00: 16352
## Mean :15.85 Mean :4.023 2015-10-15 00:00:00: 16190
## 3rd Qu.:23.00 3rd Qu.:6.000 2015-10-08 00:00:00: 16184
## Max. :31.00 Max. :7.000 2015-10-12 00:00:00: 16128
## (Other) :873222
## UNIQUE_CARRIER AIRLINE_ID CARRIER TAIL_NUM
## WN :204558 Min. :19393 WN :204558 : 3126
## AA :152840 1st Qu.:19790 AA :152840 N484HA : 544
## DL :138261 Median :19805 DL :138261 N492HA : 544
## OO : 96922 Mean :19945 OO : 96922 N491HA : 528
## EV : 92022 3rd Qu.:20355 EV : 92022 N477HA : 527
## UA : 84289 Max. :21171 UA : 84289 N488HA : 526
## (Other):202473 (Other):202473 (Other):965570
## FLIGHT_NUM ORIGIN ORIGIN_CITY_NAME
## Min. : 1 ATL : 61464 Chicago, IL : 66445
## 1st Qu.: 741 DFW : 52294 Atlanta, GA : 61464
## Median :1675 ORD : 52085 Dallas/Fort Worth, TX: 52294
## Mean :2157 LAX : 36289 New York, NY : 36573
## 3rd Qu.:3185 DEN : 33448 Los Angeles, CA : 36289
## Max. :9793 SFO : 27508 Houston, TX : 35949
## (Other):708277 (Other) :682351
## ORIGIN_STATE ORIGIN_STATE_FIPS ORIGIN_STATE_NAME ORIGIN_WAC
## TX :124213 Min. : 1.00 Texas :124213 Min. : 1.0
## CA :120088 1st Qu.:12.00 California:120088 1st Qu.:34.0
## FL : 76370 Median :25.00 Florida : 76370 Median :53.0
## IL : 69191 Mean :26.36 Illinois : 69191 Mean :55.9
## GA : 63849 3rd Qu.:42.00 Georgia : 63849 3rd Qu.:82.0
## NY : 45907 Max. :78.00 New York : 45907 Max. :93.0
## (Other):471747 (Other) :471747
## DEST DEST_CITY_NAME DEST_STATE
## ATL : 61377 Chicago, IL : 67282 TX :124531
## ORD : 52927 Atlanta, GA : 61377 CA :120155
## DFW : 52746 Dallas/Fort Worth, TX: 52746 FL : 76344
## LAX : 36437 Los Angeles, CA : 36437 IL : 69969
## DEN : 33412 New York, NY : 36377 GA : 63733
## SFO : 27540 Houston, TX : 35827 NY : 45755
## (Other):706926 (Other) :681319 (Other):470878
## DEST_STATE_FIPS DEST_STATE_NAME DEST_WAC CRS_DEP_TIME_HR
## Min. : 1.00 Texas :124531 Min. : 1.00 Min. : 0.00
## 1st Qu.:12.00 California:120155 1st Qu.:34.00 1st Qu.: 9.00
## Median :25.00 Florida : 76344 Median :53.00 Median :13.00
## Mean :26.35 Illinois : 69969 Mean :55.91 Mean :12.95
## 3rd Qu.:42.00 Georgia : 63733 3rd Qu.:82.00 3rd Qu.:17.00
## Max. :78.00 New York : 45755 Max. :93.00 Max. :23.00
## (Other) :470878
## CRS_DEP_TIME_MIN DEP_TIME_HR DEP_TIME_MIN DEP_DELAY
## Min. : 0.00 17 : 63211 55 : 22579 -3 : 79443
## 1st Qu.:10.00 8 : 61328 57 : 20944 -4 : 78215
## Median :25.00 10 : 60503 56 : 20938 -5 : 77817
## Mean :26.58 11 : 60333 58 : 20577 -2 : 74719
## 3rd Qu.:43.00 6 : 60177 54 : 20051 -1 : 65517
## Max. :59.00 13 : 59982 0 : 20038 -6 : 57721
## (Other):605831 (Other):846238 (Other):537933
## DEP_DELAY_MINS DEP_DELAY_15 DEP_DELAY_GRPS DEP_TIME_BLK
## 0 :628463 0 :801862 -1 :570024 0600-0659: 67449
## 1 : 26965 1 :155288 0 :228268 1700-1759: 67008
## 2 : 19906 NULL: 14215 1 : 61275 0700-0759: 66558
## 3 : 17464 2 : 29980 0800-0859: 66142
## 4 : 15306 3 : 18116 1300-1359: 62950
## NULL : 14215 NULL : 14215 1100-1159: 62591
## (Other):249046 (Other): 49487 (Other) :578667
## TAXI_OUT WHEELS_OFF WHEELS_ON TAXI_IN
## 12 : 76997 NULL : 14546 NULL : 15089 4 :154723
## 11 : 76263 610 : 1543 1634 : 1141 5 :153709
## 13 : 74087 608 : 1508 1853 : 1140 6 :123201
## 10 : 70594 611 : 1494 1641 : 1136 7 : 92360
## 14 : 67444 609 : 1460 1645 : 1136 3 : 88147
## 15 : 60759 612 : 1420 1628 : 1134 8 : 69870
## (Other):545221 (Other):949394 (Other):950589 (Other):289355
## CRS_ARR_TIME_HR CRS_ARR_TIME_MIN ARR_TIME_HR ARR_TIME_MIN
## Min. : 0.00 Min. : 0.00 16 : 63796 40 : 16288
## 1st Qu.:11.00 1st Qu.:14.00 18 : 61270 55 : 16278
## Median :15.00 Median :29.00 14 : 60204 54 : 16238
## Mean :14.72 Mean :28.74 20 : 59785 50 : 16190
## 3rd Qu.:19.00 3rd Qu.:45.00 19 : 59020 45 : 16173
## Max. :23.00 Max. :59.00 17 : 58970 51 : 16165
## (Other):608320 (Other):874033
## ARR_DELAY ARR_DELAY_MINS ARR_DELAY_15 ARR_DELAY_GRPS
## -8 : 30162 0 :626003 0 :795462 -1 :409469
## -9 : 30161 1 : 19287 1 :159320 -2 :195448
## -10 : 30146 2 : 17748 NULL: 16583 0 :190545
## -7 : 29290 NULL : 16583 1 : 65172
## -11 : 29246 3 : 16279 2 : 30487
## -6 : 28845 4 : 15307 3 : 17724
## (Other):793515 (Other):260158 (Other): 62520
## ARR_TIME_BLK CANCELLED CANCELLATION_CODE DIVERTED
## 1600-1659: 68376 Min. :0.00000 :956692 Min. :0.000000
## 1800-1859: 63125 1st Qu.:0.00000 A: 3962 1st Qu.:0.000000
## 1400-1459: 62786 Median :0.00000 B: 8067 Median :0.000000
## 1000-1059: 60465 Mean :0.01511 C: 2643 Mean :0.001966
## 2000-2059: 60267 3rd Qu.:0.00000 D: 1 3rd Qu.:0.000000
## 1200-1259: 60166 Max. :1.00000 Max. :1.000000
## (Other) :596180
## CRS_ELAPSED_TIME ACTUAL_ELAPSED_TIME AIR_TIME FLIGHTS
## Min. : 22.0 NULL : 16583 NULL : 16583 Min. :1
## 1st Qu.: 85.0 79 : 7909 43 : 8311 1st Qu.:1
## Median :123.0 81 : 7861 59 : 8211 Median :1
## Mean :141.8 80 : 7844 62 : 8211 Mean :1
## 3rd Qu.:175.0 76 : 7741 55 : 8177 3rd Qu.:1
## Max. :718.0 77 : 7699 57 : 8153 Max. :1
## (Other):915728 (Other):913719
## DISTANCE DISTANCE_GRP
## Min. : 31.0 Min. : 1.000
## 1st Qu.: 370.0 1st Qu.: 2.000
## Median : 647.0 Median : 3.000
## Mean : 819.6 Mean : 3.749
## 3rd Qu.:1066.0 3rd Qu.: 5.000
## Max. :4983.0 Max. :11.000
##
delay_summary <- d_15 %>%
group_by(CARRIER, MONTH) %>%
summarize(n_flights = n(),
mean_dep_delay = mean(as.numeric(DEP_DELAY_MINS)),
mean_arr_delay = mean(as.numeric(ARR_DELAY_MINS)))
p1 <- ggplot(delay_summary) +
geom_point(aes(MONTH, mean_arr_delay, color = CARRIER)) +
# facet_wrap(~CARRIER) +
ggtitle("Mean arrival delay in minutes by carrier for 2015")
ggplotly(p1)